# -*- coding: utf-8 -*-
# @Time    : 2025/08/25 15:16
# @Author  : Mr.su
# @FileName: start_spiders.py
# @FileDesc: 
import threading, pymysql, time, os, subprocess, datetime


class Manager:
    command = 'cd {} && scrapy crawl {} -s JOBDIR=disk_file/{} &'

    def get_spiders(self):
        pass

    def starts_spiders(self, website_id):
        ts = website_id + str(int(time.time() * 10000))
        command = self.command.format(PATH, website_id, ts)
        subprocess.run(command, shell=True, stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL)
        print('[info]: {} 开启爬虫{}'.format(datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S'), spider))

    def open_thread(self, website_id):
        t = threading.Thread(target=self.starts_spiders, args=(website_id,))
        t.start()

    def run(self):
        while True:
            conn = pymysql.connect(**MYSQL_CONFIG)
            cur = conn.cursor()
            sql = 'select * from website_list where collect_status=1'
            cur.execute(sql)
            line_data = cur.fetchall()
            key_lis = cur.description
            for line in line_data:
                dic = dict(zip([i[0] for i in key_lis], line))
                if dic['collect_status'] != 1:
                    continue
                print('[info]: {} 获取爬虫配置{}'.format(datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S'), dic['website_id']))
                self.open_thread(dic['website_id'])
                time.sleep(5)
            time.sleep(60 * 60 * 24)


if __name__ == '__main__':
    MYSQL_CONFIG = {
        "host": "192.168.1.16",  # 地址
        "port": 3307,  # 端口
        "user": "wow_trend",  # 用户名
        "password": "qorzi+7yDX$E$Yg",  # 密码
        "database": "wow_pc",  # 数据库名
        "charset": "utf8"  # 编码
    }
    # MYSQL_CONFIG = {
    #     "host": "127.0.0.1",  # 地址
    #     "port": 3306,  # 端口
    #     "user": "root",  # 用户名
    #     "password": "root",  # 密码
    #     "database": "crawl_center",  # 数据库名
    #     "charset": "utf8"  # 编码
    # }
    PATH = os.getcwd().replace('\\', '/') + '/CrawlCenterSpiders'
    m = Manager()
    m.run()
